{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Copyright (C) 2016 - 2019 Pinard Liu(liujianping-ok@163.com)\n",
    "\n",
    "https://www.cnblogs.com/pinard\n",
    "\n",
    "Permission given to modify the code as long as you keep this declaration at the top\n",
    "\n",
    "文本挖掘预处理之TF-IDF https://www.cnblogs.com/pinard/p/6693230.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  (0, 4)\t0.4424621378947393\n",
      "  (0, 15)\t0.697684463383976\n",
      "  (0, 3)\t0.348842231691988\n",
      "  (0, 16)\t0.4424621378947393\n",
      "  (1, 3)\t0.3574550433419527\n",
      "  (1, 14)\t0.45338639737285463\n",
      "  (1, 6)\t0.3574550433419527\n",
      "  (1, 2)\t0.45338639737285463\n",
      "  (1, 9)\t0.45338639737285463\n",
      "  (1, 5)\t0.3574550433419527\n",
      "  (2, 7)\t0.5\n",
      "  (2, 12)\t0.5\n",
      "  (2, 0)\t0.5\n",
      "  (2, 1)\t0.5\n",
      "  (3, 15)\t0.2811316284405006\n",
      "  (3, 6)\t0.2811316284405006\n",
      "  (3, 5)\t0.2811316284405006\n",
      "  (3, 13)\t0.3565798233381452\n",
      "  (3, 17)\t0.3565798233381452\n",
      "  (3, 18)\t0.3565798233381452\n",
      "  (3, 11)\t0.3565798233381452\n",
      "  (3, 8)\t0.3565798233381452\n",
      "  (3, 10)\t0.3565798233381452\n"
     ]
    }
   ],
   "source": [
    "from sklearn.feature_extraction.text import TfidfTransformer  \n",
    "from sklearn.feature_extraction.text import CountVectorizer  \n",
    "\n",
    "corpus=[\"I come to China to travel\", \n",
    "    \"This is a car polupar in China\",          \n",
    "    \"I love tea and Apple \",   \n",
    "    \"The work is to write some papers in science\"] \n",
    "\n",
    "vectorizer=CountVectorizer()\n",
    "\n",
    "transformer = TfidfTransformer()\n",
    "tfidf = transformer.fit_transform(vectorizer.fit_transform(corpus))  \n",
    "print (tfidf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "  (0, 4)\t0.4424621378947393\n",
      "  (0, 15)\t0.697684463383976\n",
      "  (0, 3)\t0.348842231691988\n",
      "  (0, 16)\t0.4424621378947393\n",
      "  (1, 3)\t0.3574550433419527\n",
      "  (1, 14)\t0.45338639737285463\n",
      "  (1, 6)\t0.3574550433419527\n",
      "  (1, 2)\t0.45338639737285463\n",
      "  (1, 9)\t0.45338639737285463\n",
      "  (1, 5)\t0.3574550433419527\n",
      "  (2, 7)\t0.5\n",
      "  (2, 12)\t0.5\n",
      "  (2, 0)\t0.5\n",
      "  (2, 1)\t0.5\n",
      "  (3, 15)\t0.2811316284405006\n",
      "  (3, 6)\t0.2811316284405006\n",
      "  (3, 5)\t0.2811316284405006\n",
      "  (3, 13)\t0.3565798233381452\n",
      "  (3, 17)\t0.3565798233381452\n",
      "  (3, 18)\t0.3565798233381452\n",
      "  (3, 11)\t0.3565798233381452\n",
      "  (3, 8)\t0.3565798233381452\n",
      "  (3, 10)\t0.3565798233381452\n"
     ]
    }
   ],
   "source": [
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "tfidf2 = TfidfVectorizer()\n",
    "re = tfidf2.fit_transform(corpus)\n",
    "print (re)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
